# ============================================================
# SCRIPT R: LPM Models — FPA Voting Discipline and Agribusiness
#           Campaign Financing (Robustness Checks)
#
# Paper: "Green Rhetoric, Conservative Votes: Analyzing Brazil's
#         Agribusiness Caucus on Environmental Legislation"
#
# 
#
# FPA membership definitions:
#   lista_FPA      : FPA members following Rey
#   Membro_FPA     : Official FPA membership (Brazilian Congress) [baseline]
#   membro_fpa     : FPA members per Cascione (2018) MA thesis
#   diap_ruralista : Ruralista caucus members (DIAP)
#
# Roll-call votes (55th Legislature, 2015-2018):
#   2015001 = PL 4330/2004
#   2015004 = PL 863/2015
#   2015005 = MP 682/2015
#   2015006 = MP 687/2015
#   2016001 = MP 692/2015
#   2016002 = MP 694/2015
#
# Author: [your name]
# Last updated: February 2026
# ============================================================

# ============================================================
# 0. SETUP
# ============================================================

library(haven)
library(tidyverse)
library(sandwich)
library(lmtest)
library(broom)
library(modelsummary)
library(patchwork)

# ---- Load and prepare data ---------------------------------
df <- read_dta("base_figs_and_models.dta") |>
  select(Id_Votacao, IndDisFPA, Coalizao_GOV, cpf,
         lista_FPA, Membro_FPA, membro_fpa, diap_ruralista,
         deputado_agro_total, frac_agro_total,
         deputado_agropecuaria, deputado_agroindustria, deputado_vertical,
         frac_agropecuaria, frac_agroindustria, frac_vertical,
         agro_total) |>
  mutate(across(where(haven::is.labelled), haven::as_factor)) |>
  filter(Id_Votacao %in% c(2015001, 2015004, 2015005, 2015006,
                           2016001, 2016002)) |>
  mutate(
    IndDisFPA      = as.numeric(as.character(IndDisFPA)    == "Divergent"),
    Coalizao_GOV   = as.numeric(as.character(Coalizao_GOV) == "Government"),
    lista_FPA      = as.numeric(as.character(lista_FPA)    == "FPA"),
    Membro_FPA             = as.numeric(Membro_FPA),
    membro_fpa             = as.numeric(membro_fpa),
    diap_ruralista         = as.numeric(diap_ruralista),
    deputado_agro_total    = as.numeric(deputado_agro_total),
    deputado_agropecuaria  = as.numeric(deputado_agropecuaria),
    deputado_agroindustria = as.numeric(deputado_agroindustria),
    deputado_vertical      = as.numeric(deputado_vertical),
    frac_agropecuaria      = as.numeric(frac_agropecuaria),
    frac_agroindustria     = as.numeric(frac_agroindustria),
    frac_vertical          = as.numeric(frac_vertical),
    agro_total             = as.numeric(agro_total)
  ) |>
  
  # ── NEW: predominant agribusiness funding type ──────────────
  # For each legislator, identify which of the three sub-sectors
  # represents the LARGEST share of their total campaign revenue.
  # Legislators with no agribusiness donations = reference category.
  #
  # Note: frac_agropecuaria + frac_agroindustria + frac_vertical
  #       = frac_agro_total (verified in data, 100% of cases).
  # Ties are broken by order of the case_when conditions (rare;
  # < 2% of cases have two fractions within 1 p.p. of each other).
  mutate(
    tipo_predominante = case_when(
      agro_total == 0 | is.na(agro_total)              ~ "No agro funding",
      frac_agropecuaria >= frac_agroindustria &
        frac_agropecuaria >= frac_vertical             ~ "Agriculture & Livestock",
      frac_agroindustria >= frac_agropecuaria &
        frac_agroindustria >= frac_vertical            ~ "Agroindustry",
      TRUE                                             ~ "Integrated Firms"
    ),
    # Set reference level: "No agro funding"
    tipo_predominante = factor(
      tipo_predominante,
      levels = c("No agro funding",
                 "Agriculture & Livestock",
                 "Agroindustry",
                 "Integrated Firms")
    )
  )

# ---- Sanity check: distribution of new variable -------------
message("\n=== Distribution of tipo_predominante (all observations) ===")
print(df |> distinct(Id_Votacao, .keep_all = FALSE) |> nrow() |>
        (\(.) message("Votes in data: ", .))())
print(table(df$tipo_predominante, useNA = "ifany"))

# ---- Vote labels --------------------------------------------
vote_labels <- c(
  "2015001" = "PL 4330/2004",
  "2015004" = "PL 863/2015",
  "2015005" = "MP 682/2015",
  "2015006" = "MP 687/2015",
  "2016001" = "MP 692/2015",
  "2016002" = "MP 694/2015"
)

# ---- Constants for all models -------------------------------
VOTES   <- c(2015001, 2015004, 2015005, 2015006, 2016001, 2016002)
DEFS    <- c("lista_FPA", "Membro_FPA", "membro_fpa", "diap_ruralista")
DEF_LBL <- c(
  lista_FPA      = "Rey (2021)",
  Membro_FPA     = "Official (Congress)",
  membro_fpa     = "Cascione (2018)",
  diap_ruralista = "DIAP Ruralista"
)

# ---- Publication theme --------------------------------------
theme_paper <- function() {
  theme_bw(base_size = 12, base_family = "Helvetica") +
    theme(
      panel.grid.minor   = element_blank(),
      panel.grid.major.y = element_blank(),
      legend.position    = "bottom",
      legend.text        = element_text(size = 11),
      legend.key.size    = unit(0.5, "cm"),
      strip.background   = element_rect(fill = "grey92"),
      strip.text         = element_text(size = 12, face = "bold"),
      axis.text          = element_text(size = 11, colour = "black"),
      axis.title         = element_text(size = 12),
      plot.title         = element_text(size = 12, face = "bold"),
      plot.caption       = element_text(size = 9, colour = "grey40")
    )
}

grey_palette_4 <- c("#000000", "#555555", "#999999", "#CCCCCC")
def_shapes     <- c(15, 16, 17, 18)





# ============================================================
# FIGURE 5 — Proportion of votes aligned with FPA leadership,
#            by donor type and environmental bill
# ============================================================

# ── ATENÇÃO: os votos ambientais não estão no arquivo base_figs_and_models.dta
# Carregue aqui o arquivo que contém os votos ambientais.
# Ajuste o nome do arquivo e os IDs abaixo conforme sua base.

df_env <- read_dta("base_figs_and_models.dta") |>   # <-- substituir pelo arquivo correto
  mutate(across(where(haven::is.labelled), haven::as_factor)) |>
  mutate(
    IndDisFPA    = as.numeric(as.character(IndDisFPA) == "Divergent"),
    lista_FPA    = as.numeric(as.character(lista_FPA) == "FPA"),
    frac_agropecuaria  = as.numeric(frac_agropecuaria),
    frac_agroindustria = as.numeric(frac_agroindustria),
    frac_vertical      = as.numeric(frac_vertical),
    agro_total         = as.numeric(agro_total),
    tipo_predominante  = case_when(
      agro_total == 0 | is.na(agro_total)              ~ "No agro funding",
      frac_agropecuaria >= frac_agroindustria &
        frac_agropecuaria >= frac_vertical             ~ "Agriculture & Livestock",
      frac_agroindustria >= frac_agropecuaria &
        frac_agroindustria >= frac_vertical            ~ "Agroindustry",
      TRUE                                             ~ "Integrated Firms"
    ),
    tipo_predominante = factor(
      tipo_predominante,
      levels = c("No agro funding", "Agriculture & Livestock",
                 "Agroindustry", "Integrated Firms")
    )
  )

# ── Confirme os IDs dos votos ambientais no seu arquivo:
message("IDs disponíveis no arquivo:")
print(sort(unique(df_env$Id_Votacao)))
# Substitua os valores abaixo pelos IDs corretos após verificar:
ENV_VOTES <- c(2015001, 2015004, 2015005, 2015006, 2016001, 2016002)   # <-- preencher com IDs dos votos ambientais

env_labels <- c(
  # "<ID>" = "<nome do projeto>"  -- preencher conforme IDs confirmados
  "..." = "PL 4148/2008",
  "..." = "MP 756/2016",
  "..." = "MP 759/2016",
  "..." = "MP 795/2017",
  "..." = "MP 809/2017",
  "..." = "PLP 163/2015"
)

# ---- Build dataset ------------------------------------------
fig5_base <- df_env |>
  filter(lista_FPA == 1, Id_Votacao %in% ENV_VOTES) |>
  mutate(
    vote_with_fpa = 1 - IndDisFPA,
    bill_label = factor(env_labels[as.character(Id_Votacao)],
                        levels = env_labels)
  )

# Verificação: quantas obs por votação?
message("Obs por votação (deve ter 6 votações):")
print(count(fig5_base, bill_label))

# ---- Summarise ----------------------------------------------
fig5_overall <- fig5_base |>
  group_by(bill_label) |>
  summarise(prop = mean(vote_with_fpa, na.rm = TRUE), .groups = "drop") |>
  mutate(panel = "Overall")

fig5_bytype <- fig5_base |>
  filter(tipo_predominante != "No agro funding") |>
  group_by(bill_label, tipo_predominante) |>
  summarise(prop = mean(vote_with_fpa, na.rm = TRUE), .groups = "drop") |>
  rename(panel = tipo_predominante) |>
  mutate(panel = as.character(panel))

fig5_data <- bind_rows(fig5_overall, fig5_bytype) |>
  mutate(
    panel = factor(panel,
                   levels = c("Overall", "Integrated Firms",
                              "Agriculture & Livestock", "Agroindustry")),
    panel = recode(panel,
                   "Integrated Firms"        = "Integrated",
                   "Agriculture & Livestock" = "Agriculture and livestock")
  )

# ---- Plot ---------------------------------------------------
bill_greys <- c(
  "MP 756/2016"  = "#D0D0D0",
  "MP 759/2016"  = "#B0B0B0",
  "MP 795/2017"  = "#909090",
  "MP 809/2017"  = "#686868",
  "PL 4148/2008" = "#404040",
  "PLP 163/2015" = "#181818"
)

fig5 <- ggplot(fig5_data,
               aes(x = bill_label, y = prop, fill = bill_label)) +
  geom_col(
    position  = position_dodge(width = 0.8),
    width     = 0.7,
    colour    = "white",
    linewidth = 0.2
  ) +
  facet_wrap(~panel, nrow = 2) +
  scale_fill_manual(values = bill_greys) +
  scale_y_continuous(
    limits = c(0, 1),
    breaks = c(0, .2, .4, .6, .8, 1),
    labels = c("0", ".2", ".4", ".6", ".8", "1")
  ) +
  labs(x = NULL, y = "Proportion of vote with FPA", fill = NULL) +
  theme_paper() +
  theme(
    axis.text.x      = element_blank(),
    axis.ticks.x     = element_blank(),
    legend.position  = "bottom",
    legend.direction = "vertical",
    legend.text      = element_text(size = 10)
  )

ggsave("Figure_5.tiff", fig5,
       width = 190, height = 150, units = "mm",
       dpi = 600, compression = "lzw", device = "tiff")

message("Saved: Figure_5.tiff")







# ============================================================
# 1. SPECIFICATION A — BINARY TOTAL AGRO DONOR  (unchanged)
#
# Model: IndDisFPA ~ Coalizao_GOV + deputado_agro_total
# ============================================================

# ---- Estimate --------------------------------------------------
rows_A <- list()
for (def in DEFS) {
  for (v in VOTES) {
    d <- df[df[[def]] == 1 & df$Id_Votacao == v, ]
    if (nrow(d) < 5 || var(d$IndDisFPA, na.rm = TRUE) == 0) next
    m  <- lm(IndDisFPA ~ Coalizao_GOV + deputado_agro_total, data = d)
    ct <- coeftest(m, vcov = vcovHC(m, type = "HC1"))
    r  <- as.data.frame(ct[, 1:2])
    colnames(r) <- c("estimate", "std.error")
    r$term       <- rownames(r)
    r$conf.low   <- r$estimate - 1.96 * r$std.error
    r$conf.high  <- r$estimate + 1.96 * r$std.error
    r$vote       <- vote_labels[as.character(v)]
    r$definition <- def
    rows_A[[length(rows_A) + 1]] <- r
  }
}
results_A <- do.call(rbind, rows_A) |>
  filter(term != "(Intercept)") |>
  mutate(
    term_label = ifelse(term == "Coalizao_GOV",
                        "Government Coalition",
                        "Overall agribusiness MP"),
    def_label  = factor(DEF_LBL[definition], levels = DEF_LBL),
    vote       = factor(vote, levels = vote_labels)
  )

# ---- Figure A --------------------------------------------------
fig_A <- ggplot(results_A,
                aes(x = estimate, xmin = conf.low, xmax = conf.high,
                    y = term_label, colour = def_label, shape = def_label)) +
  geom_vline(xintercept = 0, linetype = "dashed",
             colour = "black", linewidth = 0.4) +
  geom_pointrange(position = position_dodge(width = 0.5), size = 0.4) +
  scale_colour_manual(values = grey_palette_4) +
  scale_shape_manual(values = def_shapes) +
  facet_wrap(~vote, nrow = 2, scales = "free_x") +
  labs(x = "Coefficient (LPM)", y = NULL,
       colour = NULL, shape = NULL,
       title = "") +
  theme_paper()

ggsave("fig_specA.tiff", fig_A,
       width = 190, height = 130, units = "mm",
       dpi = 600, compression = "lzw", device = "tiff")

# ---- Tables A --------------------------------------------------
coef_map_A <- c(
  "(Intercept)"         = "(Intercept)",
  "Coalizao_GOV"        = "Government Coalition",
  "deputado_agro_total" = "Overall agribusiness MP"
)

make_table_A <- function(def, outfile, title_str) {
  models <- setNames(
    lapply(VOTES, function(v) {
      d <- df[df[[def]] == 1 & df$Id_Votacao == v, ]
      if (nrow(d) < 5) return(NULL)
      lm(IndDisFPA ~ Coalizao_GOV + deputado_agro_total, data = d)
    }),
    vote_labels
  )
  modelsummary(
    models,
    vcov     = lapply(models, function(m)
      if (!is.null(m)) vcovHC(m, type = "HC1") else NULL),
    coef_map = coef_map_A,
    stars    = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
    gof_map  = c("nobs", "r.squared", "adj.r.squared"),
    title    = title_str,
    output   = outfile
  )
  message("Saved: ", outfile)
}

make_table_A("lista_FPA",      "table_A1_listaFPA.docx",
             "Table A1 — LPM: overall agribusiness MP, lista_FPA (Rey 2021)")
make_table_A("Membro_FPA",     "table_A2_membroFPA.docx",
             "Table A2 — LPM: overall agribusiness MP, Membro_FPA (Official Congress, baseline)")
make_table_A("membro_fpa",     "table_A3_cascione.docx",
             "Table A3 — LPM: overall agribusiness MP, membro_fpa (Cascione 2018)")
make_table_A("diap_ruralista", "table_A4_diap.docx",
             "Table A4 — LPM: overall agribusiness MP, diap_ruralista (DIAP)")


# ============================================================
# 2. SPECIFICATION B — CONTINUOUS AGRO SHARE  (unchanged)
#
# Model: IndDisFPA ~ Coalizao_GOV + frac_agro_total
# ============================================================

make_table_B <- function(def, outfile, title_str) {
  models <- setNames(
    lapply(VOTES, function(v) {
      d <- df[df[[def]] == 1 & df$Id_Votacao == v, ]
      if (nrow(d) < 5) return(NULL)
      lm(IndDisFPA ~ Coalizao_GOV + frac_agro_total, data = d)
    }),
    vote_labels
  )
  modelsummary(
    models,
    vcov    = lapply(models, function(m)
      if (!is.null(m)) vcovHC(m, type = "HC1") else NULL),
    stars   = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
    gof_map = c("nobs", "r.squared", "adj.r.squared"),
    title   = title_str,
    output  = outfile
  )
  message("Saved: ", outfile)
}

make_table_B("lista_FPA",      "table_B1_listaFPA.docx",
             "Table B1 — LPM: agro share, lista_FPA (Rey 2021)")
make_table_B("Membro_FPA",     "table_B2_membroFPA.docx",
             "Table B2 — LPM: agro share, Membro_FPA (Official Congress, baseline)")
make_table_B("membro_fpa",     "table_B3_cascione.docx",
             "Table B3 — LPM: agro share, membro_fpa (Cascione 2018)")
make_table_B("diap_ruralista", "table_B4_diap.docx",
             "Table B4 — LPM: agro share, diap_ruralista (DIAP)")


# ============================================================
# 3. SPECIFICATION C — PREDOMINANT DONOR TYPE  *** REVISED ***
#
# Model: IndDisFPA ~ Coalizao_GOV + tipo_predominante
#
# tipo_predominante is a single factor with four levels:
#   "No agro funding"        [reference]
#   "Agriculture & Livestock"
#   "Agroindustry"
#   "Integrated Firms"
#
# Each dummy coefficient answers: compared to legislators with
# NO agribusiness funding, does being primarily funded by a given
# sub-sector predict divergence from FPA leadership?
#
# This replaces the previous three independent binary dummies
# (deputado_agropecuaria, deputado_agroindustria, deputado_vertical),
# which did not enforce mutual exclusivity and conflated the
# intensity of ties with their source.
# ============================================================

# ---- Sanity check: distribution per vote and definition ----
message("\n=== tipo_predominante by FPA definition (unique legislators) ===")
df |>
  filter(Id_Votacao == first(VOTES)) |>          # one row per legislator
  pivot_longer(cols = all_of(DEFS),
               names_to = "definition",
               values_to = "member") |>
  filter(member == 1) |>
  count(definition, tipo_predominante) |>
  pivot_wider(names_from = tipo_predominante,
              values_from = n, values_fill = 0) |>
  print()

# ---- Estimate --------------------------------------------------
rows_C <- list()
for (def in DEFS) {
  for (v in VOTES) {
    d <- df[df[[def]] == 1 & df$Id_Votacao == v, ]
    if (nrow(d) < 5 || var(d$IndDisFPA, na.rm = TRUE) == 0) next
    m  <- lm(IndDisFPA ~ Coalizao_GOV + tipo_predominante, data = d)
    ct <- coeftest(m, vcov = vcovHC(m, type = "HC1"))
    r  <- as.data.frame(ct[, 1:2])
    colnames(r) <- c("estimate", "std.error")
    r$term       <- rownames(r)
    r$conf.low   <- r$estimate - 1.96 * r$std.error
    r$conf.high  <- r$estimate + 1.96 * r$std.error
    r$vote       <- vote_labels[as.character(v)]
    r$definition <- def
    rows_C[[length(rows_C) + 1]] <- r
  }
}
results_C <- do.call(rbind, rows_C) |>
  filter(term != "(Intercept)") |>
  mutate(
    # Clean up auto-generated dummy names from factor
    term_label = case_when(
      term == "Coalizao_GOV"
      ~ "Government Coalition",
      term == "tipo_predominanteAgriculture & Livestock"
      ~ "Agriculture & Livestock\n(vs. no agro funding)",
      term == "tipo_predominanteAgroindustry"
      ~ "Agroindustry\n(vs. no agro funding)",
      term == "tipo_predominanteIntegrated Firms"
      ~ "Integrated Firms\n(vs. no agro funding)",
      TRUE ~ term
    ),
    term_label = factor(term_label, levels = c(
      "Government Coalition",
      "Agriculture & Livestock\n(vs. no agro funding)",
      "Agroindustry\n(vs. no agro funding)",
      "Integrated Firms\n(vs. no agro funding)"
    )),
    def_label = factor(DEF_LBL[definition], levels = DEF_LBL),
    vote      = factor(vote, levels = vote_labels)
  )

# ---- Figure C --------------------------------------------------
fig_C <- ggplot(results_C,
                aes(x = estimate, xmin = conf.low, xmax = conf.high,
                    y = term_label, colour = def_label, shape = def_label)) +
  geom_vline(xintercept = 0, linetype = "dashed",
             colour = "black", linewidth = 0.4) +
  geom_pointrange(position = position_dodge(width = 0.6), size = 0.4) +
  scale_colour_manual(values = grey_palette_4) +
  scale_shape_manual(values = def_shapes) +
  facet_wrap(~vote, nrow = 2, scales = "free_x") +
  labs(
    x       = "Coefficient (LPM)",
    y       = NULL,
    colour  = NULL,
    shape   = NULL,
    title   = "",
    caption = paste0(
      ""
    )
  ) +
  theme_paper() +
  theme(legend.position    = "bottom",
        legend.direction   = "horizontal",
        legend.justification = "center")

ggsave("fig_specC.tiff", fig_C,
       width = 190, height = 140, units = "mm",
       dpi = 600, compression = "lzw", device = "tiff")

# ---- Tables C --------------------------------------------------
# coef_map ensures consistent row ordering and clean labels across all tables
coef_map_C <- c(
  "(Intercept)"                                    = "(Intercept)",
  "Coalizao_GOV"                                   = "Government Coalition",
  "tipo_predominanteAgriculture & Livestock"        = "Agriculture & Livestock",
  "tipo_predominanteAgroindustry"                   = "Agroindustry",
  "tipo_predominanteIntegrated Firms"               = "Integrated Firms"
)

make_table_C <- function(def, outfile, title_str) {
  models <- setNames(
    lapply(VOTES, function(v) {
      d <- df[df[[def]] == 1 & df$Id_Votacao == v, ]
      if (nrow(d) < 5) return(NULL)
      lm(IndDisFPA ~ Coalizao_GOV + tipo_predominante, data = d)
    }),
    vote_labels
  )
  modelsummary(
    models,
    vcov     = lapply(models, function(m)
      if (!is.null(m)) vcovHC(m, type = "HC1") else NULL),
    coef_map = coef_map_C,
    stars    = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
    gof_map  = c("nobs", "r.squared", "adj.r.squared"),
    title    = title_str,
    notes    = paste0(
      ""
    ),
    output   = outfile
  )
  message("Saved: ", outfile)
}

make_table_C("lista_FPA",      "table_C1_listaFPA.docx",
             "Table C1 — LPM: predominant donor type, lista_FPA (Rey 2021)")
make_table_C("Membro_FPA",     "table_C2_membroFPA.docx",
             "Table C2 — LPM: predominant donor type, Membro_FPA (Official Congress, baseline)")
make_table_C("membro_fpa",     "table_C3_cascione.docx",
             "Table C3 — LPM: predominant donor type, membro_fpa (Cascione 2018)")
make_table_C("diap_ruralista", "table_C4_diap.docx",
             "Table C4 — LPM: predominant donor type, diap_ruralista (DIAP)")


# ============================================================
# END OF SCRIPT
# ============================================================

